{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72f0965b-05dd-4ad5-85fc-13beff18b913",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import platform\n",
    "import numpy as np\n",
    "import statsmodels.formula.api as smf\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3dd0c47a-a46e-48e8-983e-e3dbd6f1614f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(r\"E:\\\\df.xlsx\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "355a00e5-8f54-4010-8ebf-b33a57e657ed",
   "metadata": {},
   "source": [
    "## 1. Analysis 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4859972-6252-4ac7-92ed-caa29a1b83a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "d = df.copy()\n",
    "\n",
    "# Coerce numeric columns if needed (safe cast)\n",
    "num_cols = ['age_category','income','education','marriage','urban','ideology','year']\n",
    "for c in num_cols:\n",
    "    if c in d.columns:\n",
    "        d[c] = pd.to_numeric(d[c], errors='coerce')\n",
    "\n",
    "# Factor (categorical) columns; fill missing with a string label\n",
    "factor_cols = ['region','religion','year']  # year is treated as C(year) in the model\n",
    "for c in factor_cols:\n",
    "    if c in d.columns:\n",
    "        d[c] = d[c].astype('object').fillna('Missing').astype('category')\n",
    "\n",
    "# Make sure age_category is integer (1..6)\n",
    "d['age_category'] = d['age_category'].astype('Int64')\n",
    "\n",
    "# -------------------------------------------------------------------\n",
    "# 1) Helper: compute margins at means for age_category = 1..6\n",
    "#    using model's encoded X and get_prediction(..., transform=False)\n",
    "# -------------------------------------------------------------------\n",
    "def margins_atmeans_over_age(\n",
    "    data: pd.DataFrame,\n",
    "    outcome: str,\n",
    "    region_col='region',\n",
    "    relig_col='religion'\n",
    "):\n",
    "    \"\"\"\n",
    "    Fits OLS: outcome ~ age_category + income + education + marriage + urban\n",
    "                     + ideology + C(region) + C(religion) + C(year)\n",
    "    Then computes 'margins, at(age_category=1/…/6) atmeans' by:\n",
    "      - taking the mean (or baseline) of the encoded design matrix,\n",
    "      - setting age_category to 1..6 in that encoded space,\n",
    "      - predicting with transform=False (so no Patsy re-encode needed).\n",
    "    Returns dict with ages, mean, lo, hi, and the fitted model.\n",
    "    \"\"\"\n",
    "\n",
    "    # Drop rows with missing DV or age_category\n",
    "    sub = data.dropna(subset=[outcome, 'age_category']).copy()\n",
    "\n",
    "    # Fit OLS with categorical indicators (Stata's i.)\n",
    "    formula = (\n",
    "        f\"{outcome} ~ age_category + income + education + marriage + urban \"\n",
    "        f\"+ ideology + C({relig_col}) + C({region_col}) + C(year)\"\n",
    "    )\n",
    "    model = smf.ols(formula, data=sub).fit(cov_type='HC1')\n",
    "\n",
    "    # Build a \"base\" row in *encoded* (design) space: means across columns\n",
    "    exog = pd.DataFrame(model.model.exog, columns=model.model.exog_names)\n",
    "    base = exog.mean()\n",
    "\n",
    "    # We must be able to set the raw variable's column in encoded space.\n",
    "    # Because age_category is numeric (not C(age_category)), there *will be*\n",
    "    # a column literally named 'age_category' in exog.\n",
    "    if 'age_category' not in exog.columns:\n",
    "        raise ValueError(\"The design matrix does not contain 'age_category' \"\n",
    "                         \"(did you accidentally use C(age_category) in the formula?)\")\n",
    "\n",
    "    def predict_at_age(a_val: int):\n",
    "        row = base.copy()\n",
    "        row['age_category'] = a_val\n",
    "        # Important: transform=False, because 'row' is already encoded like exog\n",
    "        pr = model.get_prediction(row.to_frame().T, transform=False)\n",
    "        mean = float(pr.predicted_mean[0])\n",
    "        lo, hi = [float(x) for x in pr.conf_int(alpha=0.05)[0]]\n",
    "        return mean, lo, hi\n",
    "\n",
    "    ages = np.arange(1, 7)  # 1..6\n",
    "    preds = [predict_at_age(int(a)) for a in ages]\n",
    "\n",
    "    out = {\n",
    "        'ages': ages,\n",
    "        'mean': np.array([p[0] for p in preds]),\n",
    "        'lo':   np.array([p[1] for p in preds]),\n",
    "        'hi':   np.array([p[2] for p in preds]),\n",
    "        'model': model\n",
    "    }\n",
    "    return out\n",
    "\n",
    "# -------------------------------------------------------------------\n",
    "# 2) Run for the three outcomes\n",
    "# -------------------------------------------------------------------\n",
    "outcomes = [\n",
    "    ('unification_economic_inequality', '(a) Economic inequality'),\n",
    "    ('unification_regionalism',          '(b) Regionalism'),\n",
    "    ('unification_political_polarization','(c) Political polarization'),\n",
    "]\n",
    "\n",
    "results = [margins_atmeans_over_age(d, oc[0]) for oc in outcomes]\n",
    "\n",
    "# -------------------------------------------------------------------\n",
    "# 3) Plot like Stata marginsplot with rarea CI\n",
    "# -------------------------------------------------------------------\n",
    "plt.rcParams.update({\n",
    "    'font.size': 12,\n",
    "    'axes.titlesize': 14,\n",
    "    'axes.labelsize': 12,\n",
    "    'xtick.labelsize': 11,\n",
    "    'ytick.labelsize': 11\n",
    "})\n",
    "\n",
    "fig, axes = plt.subplots(1, 3, figsize=(15, 4.8), sharey=False)\n",
    "\n",
    "for ax, res, (_, title) in zip(axes, results, outcomes):\n",
    "    ages = res['ages']\n",
    "    mean = res['mean']\n",
    "    lo   = res['lo']\n",
    "    hi   = res['hi']\n",
    "\n",
    "    # CI ribbon\n",
    "    ax.fill_between(ages, lo, hi, alpha=0.25, linewidth=0, label='95% CI')\n",
    "\n",
    "    # Points and line\n",
    "    ax.plot(ages, mean, marker='o', linewidth=1.5)\n",
    "    ax.scatter(ages, mean, s=50)\n",
    "\n",
    "    # Cosmetic tweaks to mimic marginsplot\n",
    "    ax.set_title(title, size = 22, pad= 14)\n",
    "    ax.set_xlabel('Age category (1–6)' , size = 16)\n",
    "    ax.set_xlim(0.8, 6.2)\n",
    "    ax.grid(True, axis='y', linestyle=':', alpha=0.5)\n",
    "\n",
    "    # Optional: tidy y-labels\n",
    "    ax.set_ylabel('Predicted value', size = 13)\n",
    "\n",
    "# Legend once\n",
    "axes[0].legend(frameon=False, loc='best')\n",
    "\n",
    "plt.tight_layout()\n",
    "fig.savefig(dpi=600, bbox_inches=\"tight\", fname = \"Figure_1\")\n",
    "\n",
    "plt.show()\n",
    "\n",
    "# (Optional) print brief model summaries:\n",
    "for (name, _), res in zip(outcomes, results):\n",
    "    print(f\"\\n=== {name} ===\")\n",
    "    print(res['model'].summary().tables[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3eccd49a-19e7-4d27-97f2-6fc981b9dba0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "d8213993-ae7e-49c2-bdca-8d87d95268a3",
   "metadata": {},
   "source": [
    "## 2. Analysis 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c0a4c96-33c6-4c0f-aec9-feaa7923455b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Automatically use a Korean font if available\n",
    "if platform.system() == 'Windows':\n",
    "    plt.rc('font', family='Malgun Gothic')  # Windows default\n",
    "elif platform.system() == 'Darwin':  # macOS\n",
    "    plt.rc('font', family='AppleGothic')\n",
    "else:\n",
    "    # For Linux or custom environments\n",
    "    font_path = r'C:\\글꼴\\nanum-all_new\\나눔고딕\\NanumFontSetup_OTF_GOTHIC\\NanumGothic.ttf'  # Example path\n",
    "    fontprop = fm.FontProperties(fname=font_path)\n",
    "    plt.rc('font', family=fontprop.get_name())\n",
    "\n",
    "# Avoid minus sign breaking on axis\n",
    "plt.rcParams['axes.unicode_minus'] = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16b25a18-9539-4c77-829c-d881809cf89a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 연령대 범주화 함수 정의 및 적용\n",
    "def categorize_age(age):\n",
    "    if age < 30:\n",
    "        return '20s'\n",
    "    elif age < 40:\n",
    "        return '30s'\n",
    "    elif age < 50:\n",
    "        return '40s'\n",
    "    elif age < 60:\n",
    "        return '50s'\n",
    "    elif age < 70:\n",
    "        return '60s'\n",
    "    else:\n",
    "        return '70+'\n",
    "\n",
    "df['age_group'] = df['age'].apply(categorize_age)\n",
    "\n",
    "# 20대 응답자 필터링\n",
    "df_20s = df[df['age_group'] == '20s']\n",
    "\n",
    "# 연도별 평균값 계산\n",
    "yearly_means = df_20s.groupby('year')[\n",
    "    ['unification_economic_inequality', 'unification_regionalism', 'unification_political_polarization']\n",
    "].mean()\n",
    "\n",
    "# 플롯 구성\n",
    "fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharex=True, sharey=True)\n",
    "\n",
    "# 변수 이름과 제목 정의\n",
    "variables = [\n",
    "    ('unification_economic_inequality', '(a) Economic inequality'),\n",
    "    ('unification_regionalism', '(b) Regionalism'),\n",
    "    ('unification_political_polarization', '(c) Political polarization')\n",
    "]\n",
    "\n",
    "# 그래프 그리기\n",
    "for ax, (col, title) in zip(axes, variables):\n",
    "    ax.plot(yearly_means.index, yearly_means[col], marker='o', linewidth=2, color='steelblue')\n",
    "    for x, y in zip(yearly_means.index, yearly_means[col]):\n",
    "        ax.text(x, y + 0.02, f'{y:.2f}', ha='center', fontsize=18)\n",
    "    ax.set_title(title, fontsize=30, pad= 20)\n",
    "    ax.set_xlabel('', fontsize=14)\n",
    "    ax.tick_params(axis='both', labelsize=16)\n",
    "    ax.grid(True, linestyle='--', alpha=0.7)\n",
    "\n",
    "# y축 공통 라벨 설정\n",
    "axes[0].set_ylabel('Average Score (20s)', fontsize=14)\n",
    "\n",
    "# 전체 제목 및 레이아웃 조정\n",
    "plt.suptitle('', fontsize=18)\n",
    "plt.tight_layout(rect=[0, 0, 1, 0.95])\n",
    "plt.savefig(\"Figure_1.jpg\", dpi=600)\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e6ab9295-306b-4699-85e9-6a41809c554c",
   "metadata": {},
   "source": [
    "### ANOVA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31fd8b0d-bc5c-45fe-aa3e-f0ada183a12c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.stats import f_oneway"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d7f47e6-aba6-4731-8770-fe235bec059d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Categorize age into groups\n",
    "def categorize_age(age):\n",
    "    if age < 30:\n",
    "        return '20s'\n",
    "    elif age < 40:\n",
    "        return '30s'\n",
    "    elif age < 50:\n",
    "        return '40s'\n",
    "    elif age < 60:\n",
    "        return '50s'\n",
    "    elif age < 70:\n",
    "        return '60s'\n",
    "    else:\n",
    "        return '70+'\n",
    "\n",
    "# Apply age group categorization\n",
    "df['age_group'] = df['age'].apply(categorize_age)\n",
    "\n",
    "# Filter for 20s only\n",
    "df_20s = df[df['age_group'] == '20s']\n",
    "\n",
    "# Group economic inequality scores by year\n",
    "groups = [\n",
    "    group['unification_economic_inequality'].dropna()\n",
    "    for _, group in df_20s.groupby('year')\n",
    "]\n",
    "\n",
    "# Run one-way ANOVA\n",
    "f_stat, p_value = f_oneway(*groups)\n",
    "\n",
    "# Print results\n",
    "print(f\"F-statistic: {f_stat:.4f}\")\n",
    "print(f\"p-value: {p_value:.6f}\")\n",
    "\n",
    "# Optional: Interpret result\n",
    "if p_value < 0.05:\n",
    "    print(\"→ Statistically significant differences across years.\")\n",
    "else:\n",
    "    print(\"→ No statistically significant difference across years.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8c3b4e45-1ed8-4b3a-a6c9-d21284ee16b6",
   "metadata": {},
   "source": [
    "### Dunn's Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e228efe-8a45-4535-993f-e1fc41b8bef9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import scikit_posthocs as sp\n",
    "\n",
    "# Subset the 20s data\n",
    "df_20s = df[df['age_group'] == '20s'][['year', 'unification_economic_inequality']].dropna()\n",
    "\n",
    "# Run Dunn's post-hoc test with Bonferroni correction\n",
    "dunn_result = sp.posthoc_dunn(\n",
    "    df_20s, \n",
    "    val_col='unification_economic_inequality', \n",
    "    group_col='year', \n",
    "    p_adjust='bonferroni'\n",
    ")\n",
    "\n",
    "# Display the matrix of p-values\n",
    "print(dunn_result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f56af25a-7a13-4e9f-bcf2-f13c58122530",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 나이 그룹 구분\n",
    "def categorize_age(age):\n",
    "    if age < 30:\n",
    "        return '20s'\n",
    "    elif age < 40:\n",
    "        return '30s'\n",
    "    elif age < 50:\n",
    "        return '40s'\n",
    "    elif age < 60:\n",
    "        return '50s'\n",
    "    elif age < 70:\n",
    "        return '60s'\n",
    "    else:\n",
    "        return '70+'\n",
    "\n",
    "# age_group 변수 생성\n",
    "df['age_group'] = df['age'].apply(categorize_age)\n",
    "\n",
    "# 20대 필터링\n",
    "df_20s = df[df['age_group'] == '20s']\n",
    "\n",
    "# 연도별 그룹 생성\n",
    "groups = [\n",
    "    group['unification_regionalism'].dropna()\n",
    "    for _, group in df_20s.groupby('year')\n",
    "]\n",
    "\n",
    "# ANOVA 수행\n",
    "f_stat, p_value = f_oneway(*groups)\n",
    "print(f\"F-statistic: {f_stat:.4f}\")\n",
    "print(f\"p-value: {p_value:.6f}\")\n",
    "\n",
    "if p_value < 0.05:\n",
    "    print(\"→ 연도별 유의미한 차이 있음.\")\n",
    "else:\n",
    "    print(\"→ 연도별 통계적으로 유의미한 차이 없음.\")\n",
    "\n",
    "# Dunn 사후검정\n",
    "df_20s_posthoc = df_20s[['year', 'unification_regionalism']].dropna()\n",
    "dunn_result = sp.posthoc_dunn(\n",
    "    df_20s_posthoc, \n",
    "    val_col='unification_regionalism', \n",
    "    group_col='year', \n",
    "    p_adjust='bonferroni'\n",
    ")\n",
    "\n",
    "# 결과 출력\n",
    "print(dunn_result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23a39b5e-ff64-4558-a3f2-5767efb2c734",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 나이 그룹 구분\n",
    "def categorize_age(age):\n",
    "    if age < 30:\n",
    "        return '20s'\n",
    "    elif age < 40:\n",
    "        return '30s'\n",
    "    elif age < 50:\n",
    "        return '40s'\n",
    "    elif age < 60:\n",
    "        return '50s'\n",
    "    elif age < 70:\n",
    "        return '60s'\n",
    "    else:\n",
    "        return '70+'\n",
    "\n",
    "# age_group 변수 생성\n",
    "df['age_group'] = df['age'].apply(categorize_age)\n",
    "\n",
    "# 20대 필터링\n",
    "df_20s = df[df['age_group'] == '20s']\n",
    "\n",
    "# 연도별 그룹 생성\n",
    "groups = [\n",
    "    group['unification_political_polarization'].dropna()\n",
    "    for _, group in df_20s.groupby('year')\n",
    "]\n",
    "\n",
    "# ANOVA 수행\n",
    "f_stat, p_value = f_oneway(*groups)\n",
    "print(f\"F-statistic: {f_stat:.4f}\")\n",
    "print(f\"p-value: {p_value:.6f}\")\n",
    "\n",
    "if p_value < 0.05:\n",
    "    print(\"→ 연도별 유의미한 차이 있음.\")\n",
    "else:\n",
    "    print(\"→ 연도별 통계적으로 유의미한 차이 없음.\")\n",
    "\n",
    "# Dunn 사후검정\n",
    "df_20s_posthoc = df_20s[['year', 'unification_political_polarization']].dropna()\n",
    "dunn_result = sp.posthoc_dunn(\n",
    "    df_20s_posthoc, \n",
    "    val_col='unification_political_polarization', \n",
    "    group_col='year', \n",
    "    p_adjust='bonferroni'\n",
    ")\n",
    "\n",
    "# 결과 출력\n",
    "print(dunn_result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a9d2316-ca25-489d-ac7c-b34a22edd39c",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_1 = pd.read_excel(r\"E:\\불평등 연구\\데이터\\58_청년_통일\\dunn_results_20s_economy.xlsx\", index_col=0)\n",
    "df_2 = pd.read_excel(r\"E:\\불평등 연구\\데이터\\58_청년_통일\\dunn_results_20s_regionalism.xlsx\", index_col=0)\n",
    "df_3 = pd.read_excel(r\"E:\\불평등 연구\\데이터\\58_청년_통일\\dunn_results_20s_polarization.xlsx\", index_col=0)\n",
    "\n",
    "# Optional: enforce numeric year sorting\n",
    "def sort_years(df: pd.DataFrame) -> pd.DataFrame:\n",
    "    idx = pd.Index(df.index).astype(str).astype(int)\n",
    "    cols = pd.Index(df.columns).astype(str).astype(int)\n",
    "    df_sorted = df.copy()\n",
    "    df_sorted.index = idx\n",
    "    df_sorted.columns = cols\n",
    "    df_sorted = df_sorted.sort_index().sort_index(axis=1)\n",
    "    return df_sorted\n",
    "\n",
    "df_1 = sort_years(df_1)\n",
    "df_2 = sort_years(df_2)\n",
    "df_3 = sort_years(df_3)\n",
    "\n",
    "# Ensure identical ordering across all three\n",
    "years = sorted(set(df_1.index) & set(df_1.columns) &\n",
    "               set(df_2.index) & set(df_2.columns) &\n",
    "               set(df_3.index) & set(df_3.columns))\n",
    "df_1 = df_1.loc[years, years]\n",
    "df_2 = df_2.loc[years, years]\n",
    "df_3 = df_3.loc[years, years]\n",
    "\n",
    "# ---- SHOW ONLY LOWER TRIANGLE (and diagonal) ----\n",
    "def mask_to_lower_triangle(df: pd.DataFrame) -> pd.DataFrame:\n",
    "    arr = df.values.copy().astype(float)\n",
    "    # mask upper triangle (j > i) -> NaN\n",
    "    for i in range(arr.shape[0]):\n",
    "        for j in range(arr.shape[1]):\n",
    "            if j > i:\n",
    "                arr[i, j] = np.nan\n",
    "    out = pd.DataFrame(arr, index=df.index, columns=df.columns)\n",
    "    return out\n",
    "\n",
    "df_1_l = mask_to_lower_triangle(df_1)\n",
    "df_2_l = mask_to_lower_triangle(df_2)\n",
    "df_3_l = mask_to_lower_triangle(df_3)\n",
    "\n",
    "# Formatting helper for p-values\n",
    "def fmt_p(p):\n",
    "    if pd.isna(p):\n",
    "        return \"\"\n",
    "    if p < 0.001:\n",
    "        return \"<.001\"\n",
    "    if p < 0.01:\n",
    "        return f\"{p:.3f}\".lstrip(\"0\")\n",
    "    if p < 0.1:\n",
    "        return f\"{p:.3f}\".lstrip(\"0\")\n",
    "    return f\"{p:.3f}\".lstrip(\"0\")\n",
    "\n",
    "def plot_heat(ax, data: pd.DataFrame, title: str):\n",
    "    im = ax.imshow(data.values, aspect=\"equal\")  # default cmap\n",
    "\n",
    "    # Ticks/labels (bigger fonts)\n",
    "    ax.set_xticks(np.arange(len(data.columns)))\n",
    "    ax.set_yticks(np.arange(len(data.index)))\n",
    "    ax.set_xticklabels(data.columns, rotation=90, fontsize=16)\n",
    "    ax.set_yticklabels(data.index, fontsize=16)\n",
    "\n",
    "    # Thin gridlines\n",
    "    ax.set_xticks(np.arange(-.5, len(data.columns), 1), minor=True)\n",
    "    ax.set_yticks(np.arange(-.5, len(data.index), 1), minor=True)\n",
    "    ax.grid(which=\"minor\", linestyle=\"-\", linewidth=0.5, alpha=0.4)\n",
    "    ax.tick_params(which=\"minor\", bottom=False, left=False)\n",
    "\n",
    "    # Annotate each visible (lower-triangle) cell with p-value\n",
    "    cmap = im.get_cmap()\n",
    "    vmin = np.nanmin(data.values)\n",
    "    vmax = np.nanmax(data.values)\n",
    "    norm = plt.Normalize(vmin=vmin, vmax=vmax)\n",
    "\n",
    "    for i in range(data.shape[0]):\n",
    "        for j in range(data.shape[1]):\n",
    "            p = data.iat[i, j]\n",
    "            label = fmt_p(p)\n",
    "            if label == \"\":\n",
    "                continue\n",
    "            rgba = cmap(norm(p))\n",
    "            r, g, b, _ = rgba\n",
    "            luminance = 0.299*r + 0.587*g + 0.114*b\n",
    "            color = \"white\" if luminance < 0.5 else \"black\"\n",
    "            ax.text(j, i, label, ha=\"center\", va=\"center\", fontsize=9, color=color)\n",
    "\n",
    "    ax.set_title(title, pad=16, fontsize=28)\n",
    "    return im\n",
    "\n",
    "# Plot three subplots with a shared colorbar\n",
    "fig, axes = plt.subplots(1, 3, figsize=(18, 6), constrained_layout=True)\n",
    "\n",
    "im1 = plot_heat(axes[0], df_1_l, \"(a) Economic inequality\")\n",
    "im2 = plot_heat(axes[1], df_2_l, \"(b) Regionalism\")\n",
    "im3 = plot_heat(axes[2], df_3_l, \"(c) Political polarization\")\n",
    "\n",
    "# Shared color scale across all three\n",
    "vmin = np.nanmin([df_1_l.values, df_2_l.values, df_3_l.values])\n",
    "vmax = np.nanmax([df_1_l.values, df_2_l.values, df_3_l.values])\n",
    "for im in (im1, im2, im3):\n",
    "    im.set_clim(vmin=vmin, vmax=vmax)\n",
    "\n",
    "# Colorbar with larger text\n",
    "cbar = fig.colorbar(im3, ax=axes.ravel().tolist(), shrink=0.9, pad=0.02)\n",
    "cbar.set_label(\"Adjusted p-value (Dunn’s test)\", fontsize=20)\n",
    "cbar.ax.tick_params(labelsize=16)\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "32eba0fb-9ff9-43d3-b5db-fd2491231fb2",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "1e5cacf1-0e83-4e88-bd3e-c4ad1d2319ea",
   "metadata": {},
   "source": [
    "## 3. Robustness"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72943f2c-a38f-486a-b559-3f776f23c51e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(r\"E:\\\\df.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b9ba558-9f53-40a6-a0ac-b7ff93e3eda6",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame({\n",
    "        \"Outcome\": [\"Political polarization\", \"Regionalism\", \"Economic inequality\"],\n",
    "        \"Effect\": [0.313,  0.211, 0.276],\n",
    "        \"CI_low\": [0.169,  0.061, 0.125],\n",
    "        \"CI_high\": [0.472, 0.355, 0.417]\n",
    "    })\n",
    "\n",
    "# ----- 2) Prepare data -----\n",
    "y_pos = np.arange(len(df))\n",
    "effects = df[\"Effect\"].values\n",
    "ci_low = df[\"CI_low\"].values\n",
    "ci_high = df[\"CI_high\"].values\n",
    "\n",
    "# Error lengths for errorbar\n",
    "yerr = np.array([effects - ci_low, ci_high - effects])\n",
    "\n",
    "# ----- 3) Plot -----\n",
    "plt.close(\"all\")\n",
    "fig, ax = plt.subplots(figsize=(8, 5))\n",
    "\n",
    "ax.errorbar(\n",
    "    effects, y_pos,\n",
    "    xerr=yerr,\n",
    "    fmt='o', color='black', ecolor='black', elinewidth=1.5, capsize=4,\n",
    "    markersize=6\n",
    ")\n",
    "\n",
    "# Add effect and CI text\n",
    "for i, (eff, lo, hi) in enumerate(zip(effects, ci_low, ci_high)):\n",
    "    ax.text(hi + 0.02, i, f\"{eff:.3f}\\n[{lo:.3f}, {hi:.3f}]\",\n",
    "            va='center', fontsize=12)\n",
    "\n",
    "# ----- 4) Cosmetics -----\n",
    "ax.set_yticks(y_pos)\n",
    "ax.set_yticklabels(df[\"Outcome\"], fontsize=14)\n",
    "ax.set_xlabel(\"Estimated Effect (scale points)\", fontsize=13)\n",
    "ax.set_title(\"Counterfactual Simulation of 2019 Effect (Respondents in their 20s)\", fontsize=14, pad=15)\n",
    "ax.tick_params(axis='x', labelsize=12)\n",
    "ax.axvline(0, color='red', linestyle='--', linewidth=1)\n",
    "fig.tight_layout()\n",
    "\n",
    "# ----- 5) Save -----\n",
    "#out_path = Path(\"/mnt/data/cf_2019_effects_pointplot_20s.png\")\n",
    "fig.savefig(dpi=600, bbox_inches=\"tight\", fname = \"Figure_4\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "194da92b-5774-45b8-96b6-73d331f5b27c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
